package com.mi.sql

import org.apache.spark.sql.SparkSession


object hospital {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("MedicalInsurance").master("local").getOrCreate()
    val data = spark.read
      .option("header", "true")
      .csv("data/data.csv")
    val treatmentCostsDF = data
      .select("医院编码_NN", "治疗费发生金额_SUM")
      .toDF()
    treatmentCostsDF.createTempView("treatment_costs")


    // 各医院的人均总治疗费用
    spark.sql(
      """
        SELECT
        `医院编码_NN`, AVG(`治疗费发生金额_SUM`) AS `人均总治疗费用`
        FROM treatment_costs GROUP BY `医院编码_NN`
        SORT BY `人均总治疗费用` DESC
      """).show()

    spark.stop()
  }
}
